##########################################
# Refugee Exposure, Elections, Development
# Afrobarometer Data Clean
# May 24, 2021
##########################################

##########################################
# Jan 23, add AB R8
##########################################

## Clean afrobarometer data and calculate refugee exposure
sf::sf_use_s2(FALSE)

library(tidyverse)
library(rgdal)
library(raster)
library(sf)
library(doMC)
library(haven)
library(estimatr)
library(geosphere)
library(readxl)

## Define all outcome vars other than
## migrants_movement_standardized. NA so that
## we use the 2016 AB round instead of 2015
outcome_vars <- c("migrants_attitude_standardized",
                  "feel_unsafe_standardized",
                  "feared_crime_standardized",
                  "presidential_approval_standardized",
                  "partisanship_nrm_standardized", 
                  "trust_president_standardized", 
                  "trust_rulling_party_standardized",
                  "born_non_ugandans_standardized",
                  "foreigner_residents_standardized",
                  "wealth_index", 
                  "gvmt_perf_index")

# ab_data_orig <- read_csv("ab_final_v4_geocoded_with_index.csv") %>%
#   mutate(key = paste0(year, respno))

ab_data <- read.csv("ab_final_v6_geocoded_with_index.csv") %>%
  mutate(key = paste0(year, respno))
ab_data <- ab_data %>%
  mutate(across(all_of(outcome_vars), ~case_when(year == 2015~NA_real_, TRUE~.x)))
# ab_data <- ab_data[match(ab_data_orig$key, ab_data$key),]

## ---------------
## Load shapefiles
## ---------------
refsites <- readOGR(path.expand("ugakaimug"), "settlement_boundaries")

## Mungula site which was newly setup
refsites_mungula <- readOGR(path.expand("UGA_mungula_ii"), "UGA_mungula_ii")
refsites_mungula <- spTransform(refsites_mungula, crs(refsites))
refsites_mungula@data = refsites_mungula@data %>% 
  mutate(Name_setlm = "Mungula II", District = "Adjumani", Sqkm = 10.279261) %>%
  dplyr::select(District, Name_setlm, Sqkm)

## Kitgum and Okollo sites
refsites_kitgum_okollo <- readOGR(path.expand("Kitgum and Okollo"), "Boundaries_Kitgum_Okollo")
refsites_kitgum_okollo <- spTransform(refsites_kitgum_okollo, crs(refsites))

## Clean up refsites names
refsites@data$Name_setlm <- as.character(refsites@data$Name_setlm)
refsites@data$Name_setlm[4] <- "Oliji I"
refsites@data$Name_setlm[5] <- "Oliji II"
refsites@data$Name_setlm[17] <- "Maaji 1A"
refsites@data$Name_setlm[18] <- "Maaji 1B"
refsites@data$Name_setlm[30] <- "Palorinya I"
refsites@data$Name_setlm[31] <- "Palorinya II"
refsites@data$Name_setlm[32] <- "Palorinya III"
refsites@data$Name_setlm[33] <- "Palorinya IV"
refsites@data$Name_setlm[34] <- "Palorinya V"
refsites@data$Name_setlm[35] <- "Palorinya VI"
refsites@data$Name_setlm[36] <- "Palorinya VII"
refsites@data$Name_setlm[37] <- "Palorinya VIII"
refsites@data$Name_setlm[38] <- "Palorinya IX"
refsites@data$Name_setlm[39] <- "Palorinya X"

## -------------------------------
## Get distance to nearest refsite
## -------------------------------
refsites_sf <- st_as_sf(refsites)
refsites_ko_sf <- st_as_sf(refsites_kitgum_okollo)
refsites_mungula_sf <- st_as_sf(refsites_mungula)
ab_sf <- st_as_sf(ab_data, coords = c("longitude", "latitude"), crs = st_crs(refsites_sf))

# ## WARNING:
# ##     TAKES HOURS TO RUN
# registerDoMC(detectCores()-1)
# refsites_dist <- foreach(i = 1:nrow(ab_sf), .combine = "rbind") %dopar% {
#     out <- st_distance(ab_sf[i,], refsites_sf)
#     if(i %% 100 == 0){
#         print(paste0("Done with ", i, " parishes at ", Sys.time(), "."))
#     }
#     return(out/1000)
# }
# write_csv(as.data.frame(refsites_dist), file = "ab_distance_to_refsite.csv")

refsites_dist <- read_csv("ab_distance_to_refsite.csv")

# ## Add in kitgum and okollo
# registerDoMC(detectCores()-1)
# refsites_ko_dist <- foreach(i = 1:nrow(ab_sf), .combine = "rbind") %dopar% {
#     out <- st_distance(ab_sf[i,], refsites_ko_sf)
#     if(i %% 100 == 0){
#         print(paste0("Done with ", i, " parishes at ", Sys.time(), "."))
#     }
#     return(out/1000)
# }
# write_csv(as.data.frame(refsites_ko_dist), file = "ab_distance_to_refsite_kitgum_okollo.csv")

refsites_ko_dist <- read_csv("ab_distance_to_refsite_kitgum_okollo.csv")

# ## Add in Mungula II
# registerDoMC(detectCores()-1)
# refsites_mungula_dist <- foreach(i = 1:nrow(ab_sf), .combine = "rbind") %dopar% {
#     out <- st_distance(ab_sf[i,], refsites_mungula_sf)
#     if(i %% 100 == 0){
#         print(paste0("Done with ", i, " parishes at ", Sys.time(), "."))
#     }
#     return(out/1000)
# }
# write_csv(as.data.frame(refsites_mungula_dist), file = "ab_distance_to_refsite_mungula.csv")

refsites_mungula_dist <- read_csv("ab_distance_to_refsite_mungula.csv")

## Put together
refsites_dist <- bind_cols(refsites_dist, refsites_ko_dist,refsites_mungula_dist)
colnames(refsites_dist) <- paste(
  c(refsites@data$Name_setlm, as.character(refsites_kitgum_okollo@data$Name), refsites_mungula@data$Name_setlm),
  "Distance")

## ----------------------
## Get distance to border
## ----------------------
uganda_boundary <- readOGR(path.expand("Uganda_countryboundaries_adm0"), 
                           "uga_admbnda_adm0_UBOS_v2")

# ## WARNING:
# ##     TAKES HOURS TO RUN
# ## Calculate distance
# registerDoMC(detectCores()-1)
# border_dist <- foreach(i = 1:nrow(ab_data), .combine = "rbind") %dopar% {
#     ab_sp <- SpatialPoints(ab_data[i,c("longitude", "latitude")], proj4string = crs(ab_sf))
#     out <- dist2Line(ab_sp, uganda_boundary)
#     if(i %% 100 == 0){
#         print(paste0("Done with ", i, " parishes at ", Sys.time(), "."))
#     }
#     return(out[,1]/1000)
# }
# write_csv(as.data.frame(border_dist), file = "ab_distance_to_border.csv")

border_dist <- read_csv("ab_distance_to_border.csv")
colnames(border_dist) <- "borderdist"

## --------------------
## Get distance to road
## --------------------
uganda_road <- readOGR(path.expand("Uganda_roads_feb2009"), "Uganda_Roads_Feb2009")
uganda_road <- spTransform(uganda_road, crs(ab_sf))
uganda_road_sf <- st_as_sf(uganda_road)

# ## WARNING:
# ##     TAKES HOURS TO RUN
# ## Calculate distance
# registerDoMC(detectCores()-1)
# road_dist <- foreach(i = 1:nrow(ab_sf), .combine = "rbind") %dopar% {
#     out <- st_distance(ab_sf[i,], uganda_road_sf)
#     if(i %% 100 == 0){
#         print(paste0("Done with ", i, " parishes at ", Sys.time(), "."))
#     }
#     return(min(as.numeric(out), na.rm = TRUE)/1000)
# }
# write_csv(as.data.frame(road_dist), file = "ab_distance_to_road.csv")

road_dist <- read_csv("ab_distance_to_road.csv")
colnames(road_dist) <- "roaddist"

## -----------------------
## Get distance to capital
## -----------------------
kampala_coord <- st_point(x = c(32.595242, 0.310841)) %>%
  st_sfc(crs = st_crs(ab_sf))

# registerDoMC(detectCores()-1)
# cap_dist <- foreach(i = 1:nrow(ab_sf), .combine = "rbind") %dopar% {
#     out <- st_distance(ab_sf[i,], kampala_coord)
#     if(i %% 100 == 0){
#         print(paste0("Done with ", i, " parishes at ", Sys.time(), "."))
#     }
#     return(out[1,1]/1000)
# }
# write_csv(as.data.frame(cap_dist), file = "ab_distance_to_capital.csv")

cap_dist <- read_csv("ab_distance_to_capital.csv")
colnames(cap_dist) <- "capitoldist"

## -------------------
## Merge onto AB data
## -------------------
## D = District
## C = County
## S = Sub-county

ab_data <- bind_cols(ab_data, refsites_dist) %>%
  bind_cols(border_dist) %>%
  bind_cols(road_dist) %>%
  bind_cols(cap_dist) %>%
  mutate(P_02_ID = as.character(P_02_ID))


## -------------------------
## Get min distance variable
## -------------------------
ab_data <- ab_data %>%
  mutate(
    min_distance = pmap_dbl(
      .l = dplyr::select(., ends_with(" Distance")),
      .f = function(...) min(...)
    )
  )

## ----------------------------
## Change the years to match AB
## ----------------------------

## In 2015 there is one question we want, for all others we want 2016
## Migrant movement

ab_data <- ab_data %>%
  mutate(year = case_when(year == 2005~2001,
                          year == 2008~2006,
                          year == 2011~2011, 
                          year == 2015~2016,
                          year == 2016~2016,
                          year == 2019~2020))

## --------------------------------
## Merge in the populations by year
## --------------------------------
refsites_pop <- read_csv("uga_refsites_population_final_analysis.csv")
refsites_pop <- refsites_pop %>%
  dplyr::select(Name_setlm, Year, `Refugee Population`)

## Add in population data for Mungula II (2020)
refsites_pop[(nrow(refsites_pop)+1),] <- list("Mungula II", 2020, 1593)
refsites_pop[(nrow(refsites_pop)+1),] <- list("Mungula II", 2015, 0)
refsites_pop[(nrow(refsites_pop)+1),] <- list("Mungula II", 2010, 0)
refsites_pop[(nrow(refsites_pop)+1),] <- list("Mungula II", 2005, 0)
refsites_pop[(nrow(refsites_pop)+1),] <- list("Mungula II", 2000, 0)
refsites_pop$Year[refsites_pop$Year==2020] <- 2019

## Correct population in Adjumani
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Nyumanzi")] <- 40551
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Pagirinya")] <- 36768
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Ayilo I")] <- 25893
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Maaji II")] <- 17365
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Maaji III")] <- 15366
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Ayilo II")] <- 14506
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Boroli 1")] <- 10049
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Agojo")] <- 7054
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Baratuku")] <- 6921
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Mirieyi")] <- 6825
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Alere")] <- 6764
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Olua I")] <- 5487
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Boroli 2")] <- 5124
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Mungula I")] <- 4941
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Olua II")] <- 4298
#refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Mungula II")] <- 1593
## the new data only has Oliji as a whole with 1414 population
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Oliji I")] <- 707
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Oliji II")] <- 707
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Elema")] <- 989
## the new data only has Maaji I as a whole with 515 population
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Maaji 1A")] <- 257
refsites_pop$`Refugee Population`[(refsites_pop$Year==2019)&(refsites_pop$Name_setlm=="Maaji 1B")] <- 2578


## Check names
all_sites <- colnames(refsites_dist)
all_sites <- gsub(" Distance", "", all_sites)

all(refsites_pop$Name_setlm %in% all_sites)
all(all_sites %in% refsites_pop$Name_setlm)

## Widen
refsites_pop <- refsites_pop %>% 
  pivot_wider(names_from = Name_setlm, values_from = `Refugee Population`) %>%
  mutate(Year = Year + 1)
names(refsites_pop) <- c("Year", paste(names(refsites_pop)[-1], "Population"))

## Final data merge
ab_data <- ab_data %>% 
  left_join(refsites_pop, by = c("year" = "Year"))
nrow(ab_data)

## --------------------
## Fix nearest distance
## --------------------
nd_df <- inner_join(
  ab_data %>%
    dplyr::select(round, respno, ends_with(" Distance")) %>%
    pivot_longer(cols = ends_with(" Distance"), names_to = "camp", values_to = "distance") %>%
    mutate(camp = gsub(" Distance", "", camp)),
  ab_data %>%
    dplyr::select(round, respno, ends_with(" Population")) %>%
    pivot_longer(cols = ends_with(" Population"), names_to = "camp", values_to = "population") %>%
    mutate(camp = gsub(" Population", "", camp))
)

min_distance_fixed <- nd_df %>%
  filter(population > 0) %>%
  group_by(round, respno) %>%
  summarize(min_distance = min(distance))

ab_data <- ab_data %>%
  dplyr::select(-min_distance) %>%
  left_join(min_distance_fixed)

## -------------------
## Create new measures
## -------------------
exposure_measure_df <- ab_data %>% 
  dplyr::select(round, respno, ends_with(" Distance"), ends_with(" Population"))

## Make long df
exposure_measure_df <- inner_join(
  exposure_measure_df %>% dplyr::select(-ends_with(" Population")) %>%
    pivot_longer(cols = ends_with(" Distance"), names_to = "camp", values_to = "distance") %>%
    mutate(camp = gsub(" Distance", "", camp)),
  exposure_measure_df %>% dplyr::select(-ends_with(" Distance")) %>%
    pivot_longer(cols = ends_with(" Population"), names_to = "camp", values_to = "population") %>%
    mutate(camp = gsub(" Population", "", camp))
)

## Make measures - all camps
exposure_measure_df_o_all <- exposure_measure_df %>%
  mutate(exposure = population / (distance + 1),
         exposure_ln = log(exposure)) %>%
  group_by(round, respno) %>%
  filter(population > 0) %>%
  mutate(sum_exposure_20km_rad = coalesce(sum(exposure[distance < 20]), 0),
         sum_exposure_50km_rad = coalesce(sum(exposure[distance < 50]), 0),
         sum_exposure_100km_rad = coalesce(sum(exposure[distance < 100]), 0),
         avg_all_exposure_20 = coalesce(mean(exposure[distance < 20]), 0),
         avg_all_exposure_50 = coalesce(mean(exposure[distance < 50]), 0),
         avg_all_exposure_100 = coalesce(mean(exposure[distance < 100]), 0),
         avg_all_exposure_150 = coalesce(mean(exposure[distance < 150]), 0),
         avg_all_exposure_200 = coalesce(mean(exposure[distance < 200]), 0),
         avg_all_exposure_full = coalesce(mean(exposure), 0),
         avg_all_exposure_ln_100 = log(avg_all_exposure_100),
         avg_all_exposure_ln_150 = log(avg_all_exposure_150),
         avg_all_exposure_ln_200 = log(avg_all_exposure_200),
         avg_all_exposure_ln_full = log(avg_all_exposure_full),
         across(starts_with("avg_all_exposure_ln"), ~case_when(is.infinite(.x)~NA_real_, TRUE~.x)),
         across(starts_with("avg_all_exposure_ln"), ~coalesce(.x, 0))) %>%
  filter(distance == min(distance)) %>% ## Now filter to smallest distnace
  filter(population == max(population)) %>% ## To break any remaining ties, take the largest settlement
  rename(nearest_exposure = exposure, nearest_exposure_ln = exposure_ln) %>%
  dplyr::select(-c(camp, distance, population, starts_with("D_02_ID"))) %>% 
  distinct()

## Merge back to data, rename ID for easier use
ab_data <- ab_data %>% 
  inner_join(exposure_measure_df_o_all)
nrow(ab_data)

## Output merged dataset
write_csv(ab_data, path = "ab_data_merged.csv")
